********************************************************************************
* This do file codes the manufacturing census data 

* Last modified by: Jie Bai
* Date: 08/06/2017
********************************************************************************


clear all
set more off
set scheme s1color
set matsize 11000

***Set directory
capture cd "/Users/Dropbox/Chinese food exports"


***load data
use "Data/Manufacturing survey/manufacturing_survey_cleaned_1998_2013",clear

ren hylb industry4digit
ren djzclx registration_type
ren gykgqk ownership_type
ren kysjn establishment_year
ren cyrs employment
ren cpxslr salesprofits
ren zjtrhj intermediates
ren zyywsr salesrevenue
ren ckjhz exportsvalue
g totalcosts=zyywcb+zycb
drop zycb zyywcb
	
g industry2digit=substr(industry4digit,1,2)
g dairy=inlist(industry4digit,"1493","1420") if year<2003
replace dairy=inlist(industry4digit,"1534","1440") if year>2002

replace salesrevenue=salesprofits+totalcosts if salesrevenue==.

***fill in missing exporting values from customs data
preserve
keep if exportsvalue==.
merge m:1 panelid using "Data/firm_codebook/firmID_census2customs.dta"
keep if _m==3
drop _m
tempfile fillinexp
save `fillinexp', replace
restore

preserve
use "Data/ChineseCustomsData/customs_yearly_2000_2013.dta", clear
keep if import==0
collapse (sum) value, by(party_id year)
tempfile customexp
save `customexp', replace
use `fillinexp', clear
merge 1:1 party_id year using `customexp'
keep if _m==3
drop _m
merge m:1 year using "Data/other_codebook/usdtocnyexchangerate_2000_2013.dta"
drop if _m==2
drop _m
replace value=value*exchrate/10^3
keep panelid year value
save `fillinexp', replace
restore

merge 1:1 panelid year using `fillinexp'
drop _m


merge 1:1 panelid year using `fillinexp'
replace exportsvalue=value if exportsvalue==.
replace exportsvalue=0 if exportsvalue==.

gen dsalesrevenue=salesrevenue-exportsvalue
replace dsalesrevenue=. if dsalesrevenue<0


replace salesrevenue=salesrevenue/10^3  //in million RMB
replace dsalesrevenue=dsalesrevenue/10^3
replace salesprofits=salesprofits/10^3
replace intermediates=intermediates/10^3
replace salesrevenue=. if salesrevenue<2.893 & dairy==1
replace dsalesrevenue=. if dsalesrevenue<1.049 & dairy==1

g lnemployment=log(employment)
g lnsalesrevenue=log(salesrevenue)
g lndsalesrevenue=log(dsalesrevenue)
g lnsalesprofits=log(salesprofits)

g food=inlist(industry2digit, "14","15") //food and drinks

g temp=0 if registration_type!=""
replace temp=1 if inlist(registration_type,"170","171","172","173","174")
bys panelid: egen private=mode(temp),minmode
drop temp
drop _m

merge m:1 panelid using "Data/inspection_lists/inspection_list_manucensus.dta"
tab _m 
drop if _m==2
drop _m
replace affected=0 if affected==.
replace innocent=0 if innocent==.
replace round1=0 if round1==.
replace round2=0 if round2==.
replace round3=0 if round3==.
replace round4=0 if round4==.
g noninspected=(affected==0 & innocent==0)

g post=0
replace post=1 if year>2008
g affectedXpost=affected*post
g innocentXpost=innocent*post

g dairyXpost=dairy*post
g foodXpost=food*post

//other variables
ren yjkff research
replace research=research/10^3
ren ggf advertising
replace advertising=advertising/10^3

g lresearch=log(research+0.1)
g ladvertising=log(advertising+0.1)

//label

label var industry4digit "4-digit industry code"
label var registration_type "Registration type"
label var ownership_type "Ownership type"
label var establishment_year "Years of establishment"
label var employment "Employment"
label var intermediates "Intermediary inputs"
label var year "Year"
label var salesrevenue "Sales revenue"
label var dsalesrevenue "Domestic sales revenue"
label var salesprofits "Sales profits"
label var prov "Province"
label var totalcosts "Total costs"
label var lnemployment "Log (Employment)"
label var lnsalesrevenue "Log (sales revenue)"
label var lndsalesrevenue "Log (domestic sales revenue)"
label var lnsalesprofits "Log (sales profits)"
label var dairy "Dairy (dummy)"
label var food "Food (dummy)"
label var private "Private (dummy)"
label var affected "Contaminated Firms"
label var innocent "Innocent Firms"
label var subsidiary "Sister of contaminated firms"
label var brand_id "Sister network id by brand"

label var post "Post"
label var affectedXpost "CFirmsXPost"
label var innocentXpost "IFirmsXPost"
label var dairyXpost "DairyXPost"
label var foodXpost "FoodXPost"
label var research "R&D expenditure"
label var advertising "Advertising expenditure"


save "Data/coded data/manucensus_coded",replace

***firms baseline sales revenue
use "Data/coded data/manucensus_coded",clear

keep if inrange(year,2005,2009)
keep if year<2008

bys panelid: egen firm_totsales_bl=total(salesrevenue)
bys panelid: egen firm_maxsales_bl=max(salesrevenue)
bys panelid: egen firm_meansales_bl=mean(salesrevenue)
duplicates drop panelid year,force
duplicates drop panelid,force
keep panelid firm_totsales_bl firm_maxsales_bl firm_meansales_bl
sum firm_totsales_bl,d
sum firm_maxsales_bl,d
sum firm_meansales_bl,d

gen firm_sales_bl=firm_totsales_bl

sort panelid

save "Data/Coded data/manu_firmssales_bl",replace


***firms baseline age
use "Data/coded data/manucensus_coded",clear
keep panelid year age
sort panelid year
reshape wide age, i(panelid) j(year)

forvalues i=1(1)15{
local j=2013-`i'
local k=`j'+1
replace age`j'=age`k'-1 if age`k'!=.
replace age`j'=. if age`j'==0 | age`j'<0
}

reshape long 

sort panelid year

keep if inrange(year,2005,2009)
keep if year<2008

duplicates drop panelid year,force
bys panelid: egen firm_age_bl=max(age)
duplicates drop panelid,force

tab firm_age_bl,m  //% one-year firms

save "Data/Coded data/manu_firmsage_bl",replace



***industry level baseline cha
use "Data/coded data/manucensus_coded",clear

keep if inrange(year,2005,2009)
keep if year<2008

collapse (sum) salesrevenue,by(panelid industry2digit)
bys industry2digit:egen id2avg_sales_bl=median(salesrevenue)

duplicates drop industry2digit,force
keep industry2digit id2avg_sales_bl

save "Data/Coded data/manu_indcha_bl",replace


***affected sourcing province
use "Data/coded data/manucensus_coded",clear
keep prov affected
keep if affected==1 
duplicates drop prov,force

save "Data/Coded data/manu_affectedprov_firmloc",replace


***affected sourcing city
use "Data/coded data/manucensus_coded",clear
keep city affected
keep if affected==1 
duplicates drop city,force

save "Data/Coded data/manu_affectedcity_firmloc",replace


***baseline characteristics
use "Data/coded data/manucensus_coded", clear

merge m:1 panelid using "Data/Coded data/manu_firmssales_bl"
replace firm_totsales_bl=. if _m==1
replace firm_maxsales_bl=. if _m==1
replace firm_meansales_bl=. if _m==1
drop if _m==2
drop _m

merge m:1 panelid using "Data/Coded data/manu_firmsage_bl"
replace firm_age_bl=. if _m==1
drop if _m==2
drop _m

merge m:1 industry2digit using "Data/Coded data/manu_indcha_bl"
replace id2avg_sales_bl=. if _m==1
drop if _m==2
drop _m

merge m:1 prov using "Data/Coded data/manu_affectedprov_firmloc"
tab _m
g affected_firmprovloc=0 if prov!=.
replace affected_firmprovloc=1 if _m==3
drop _m

merge m:1 city using "Data/Coded data/manu_affectedcity_firmloc"
tab _m
g affected_firmcityloc=0 if city!=.
replace affected_firmcityloc=1 if _m==3
drop _m


g largesize=(firm_sales_bl>id2avg_sales_bl)
g experienced1=(firm_age_bl>1)
g experienced2=(firm_age_bl>2)
g experienced3=(firm_age_bl>3)
g experienced4=(firm_age_bl>4)
g experienced5=(firm_age_bl>5)
g experienced10=(firm_age_bl>10)
gen firmprovlocXpost=affected_firmprovloc*post
gen firmcitylocXpost=affected_firmcityloc*post

egen provid=group(prov)
egen prov_year=group(prov year)

egen cityid=group(city)
egen city_year=group(city year)

label var firmprovlocXpost "FirmProvLocXPost"
label var firmcitylocXpost "FirmCityLocXPost"

save "Data/coded data/manucensus_coded.dta",replace
